{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Import Module"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import json"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Import technical Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Open</th>\n",
       "      <th>High</th>\n",
       "      <th>Low</th>\n",
       "      <th>Close</th>\n",
       "      <th>Adj Close</th>\n",
       "      <th>Volume</th>\n",
       "      <th>lrets</th>\n",
       "      <th>MACD</th>\n",
       "      <th>stochastics</th>\n",
       "      <th>ATR</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Date</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1990-01-04</th>\n",
       "      <td>358.760010</td>\n",
       "      <td>358.760010</td>\n",
       "      <td>352.890015</td>\n",
       "      <td>355.670013</td>\n",
       "      <td>355.670013</td>\n",
       "      <td>177000000</td>\n",
       "      <td>-0.008650</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5.869995</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1990-01-05</th>\n",
       "      <td>355.670013</td>\n",
       "      <td>355.670013</td>\n",
       "      <td>351.350006</td>\n",
       "      <td>352.200012</td>\n",
       "      <td>352.200012</td>\n",
       "      <td>158530000</td>\n",
       "      <td>-0.009804</td>\n",
       "      <td>-0.036878</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.320007</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1990-01-08</th>\n",
       "      <td>352.200012</td>\n",
       "      <td>354.239990</td>\n",
       "      <td>350.540009</td>\n",
       "      <td>353.790009</td>\n",
       "      <td>353.790009</td>\n",
       "      <td>140110000</td>\n",
       "      <td>0.004504</td>\n",
       "      <td>-0.005628</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.699981</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1990-01-10</th>\n",
       "      <td>349.619995</td>\n",
       "      <td>349.619995</td>\n",
       "      <td>344.320007</td>\n",
       "      <td>347.309998</td>\n",
       "      <td>347.309998</td>\n",
       "      <td>175990000</td>\n",
       "      <td>-0.006629</td>\n",
       "      <td>-0.160033</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9.470002</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1990-01-11</th>\n",
       "      <td>347.309998</td>\n",
       "      <td>350.140015</td>\n",
       "      <td>347.309998</td>\n",
       "      <td>348.529999</td>\n",
       "      <td>348.529999</td>\n",
       "      <td>154390000</td>\n",
       "      <td>0.003507</td>\n",
       "      <td>-0.170328</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.830017</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  Open        High         Low       Close   Adj Close  \\\n",
       "Date                                                                     \n",
       "1990-01-04  358.760010  358.760010  352.890015  355.670013  355.670013   \n",
       "1990-01-05  355.670013  355.670013  351.350006  352.200012  352.200012   \n",
       "1990-01-08  352.200012  354.239990  350.540009  353.790009  353.790009   \n",
       "1990-01-10  349.619995  349.619995  344.320007  347.309998  347.309998   \n",
       "1990-01-11  347.309998  350.140015  347.309998  348.529999  348.529999   \n",
       "\n",
       "               Volume     lrets      MACD  stochastics       ATR  \n",
       "Date                                                              \n",
       "1990-01-04  177000000 -0.008650  0.000000          NaN  5.869995  \n",
       "1990-01-05  158530000 -0.009804 -0.036878          NaN  4.320007  \n",
       "1990-01-08  140110000  0.004504 -0.005628          NaN  3.699981  \n",
       "1990-01-10  175990000 -0.006629 -0.160033          NaN  9.470002  \n",
       "1990-01-11  154390000  0.003507 -0.170328          NaN  2.830017  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_hdf(\"technical500.h5\", \"data\")\n",
    "#df.index = pd.to_datetime(df.index, format='%Y-%m-%d')\n",
    "#df.dropna(axis=0, inplace=True)\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Import News data and convert it from Json to pandas dataframe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def open_json(year, month):\n",
    "    \"Save as pandas dataframe\"\n",
    "    file_str = 'data/' + str(year) + '-' + '{:02}'.format(month) + '.json'\n",
    "    with open(file_str) as data_file:    \n",
    "        NYTimes_data = json.load(data_file)\n",
    "    \n",
    "    date_list = []\n",
    "    df = pd.DataFrame()  \n",
    "    df['News'] = None\n",
    "    \n",
    "\n",
    "    for i in range(len(NYTimes_data[\"response\"][\"docs\"][:])):\n",
    "        if NYTimes_data[\"response\"][\"docs\"][i][\"pub_date\"][:10] not in df.index:\n",
    "            df.loc[NYTimes_data[\"response\"][\"docs\"][i][\"pub_date\"][:10]] = NYTimes_data[\"response\"][\"docs\"][:][i]['headline']['main']\n",
    "        else:\n",
    "            df.loc[NYTimes_data[\"response\"][\"docs\"][i][\"pub_date\"][:10]] = df.loc[NYTimes_data[\"response\"][\"docs\"][i][\"pub_date\"][:10]].values + NYTimes_data[\"response\"][\"docs\"][:][i]['headline']['main']\n",
    "    \n",
    "    df.index = pd.to_datetime(df.index, format='%Y-%m-%d')\n",
    "    df.sort_index(inplace=True)\n",
    "    \n",
    "    return df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Merge technical data with News"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def merge_news_price(df):\n",
    "    all_news = pd.DataFrame()\n",
    "    for i in range(1990, 2019):\n",
    "        for j in range(1, 13):\n",
    "            try:\n",
    "                temp = open_json(i, j)\n",
    "                all_news = all_news.append(temp)\n",
    "                print(\"Completed {}-{}\".format(i, j))\n",
    "            except:\n",
    "                pass\n",
    "            \n",
    "    all_news = pd.concat([df, all_news], axis=1)\n",
    "    \n",
    "    return all_news"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Completed 1990-1\n",
      "Completed 1990-2\n",
      "Completed 1990-3\n",
      "Completed 1990-4\n",
      "Completed 1990-5\n",
      "Completed 1990-6\n",
      "Completed 1990-7\n",
      "Completed 1990-8\n",
      "Completed 1990-9\n",
      "Completed 1990-10\n",
      "Completed 1990-11\n",
      "Completed 1990-12\n",
      "Completed 1991-1\n",
      "Completed 1991-2\n",
      "Completed 1991-3\n",
      "Completed 1991-4\n",
      "Completed 1991-5\n",
      "Completed 1991-6\n",
      "Completed 1991-7\n",
      "Completed 1991-8\n",
      "Completed 1991-9\n",
      "Completed 1991-10\n",
      "Completed 1991-11\n",
      "Completed 1991-12\n",
      "Completed 1992-1\n",
      "Completed 1992-2\n",
      "Completed 1992-3\n",
      "Completed 1992-4\n",
      "Completed 1992-5\n",
      "Completed 1992-6\n",
      "Completed 1992-7\n",
      "Completed 1992-8\n",
      "Completed 1992-9\n",
      "Completed 1992-10\n",
      "Completed 1992-11\n",
      "Completed 1992-12\n",
      "Completed 1993-1\n",
      "Completed 1993-2\n",
      "Completed 1993-3\n",
      "Completed 1993-4\n",
      "Completed 1993-5\n",
      "Completed 1993-6\n",
      "Completed 1993-7\n",
      "Completed 1993-8\n",
      "Completed 1993-9\n",
      "Completed 1993-10\n",
      "Completed 1993-11\n",
      "Completed 1993-12\n",
      "Completed 1994-1\n",
      "Completed 1994-2\n",
      "Completed 1994-3\n",
      "Completed 1994-4\n",
      "Completed 1994-5\n",
      "Completed 1994-6\n",
      "Completed 1994-7\n",
      "Completed 1994-8\n",
      "Completed 1994-9\n",
      "Completed 1994-10\n",
      "Completed 1994-11\n",
      "Completed 1994-12\n",
      "Completed 1995-1\n",
      "Completed 1995-2\n",
      "Completed 1995-3\n",
      "Completed 1995-4\n",
      "Completed 1995-5\n",
      "Completed 1995-6\n",
      "Completed 1995-7\n",
      "Completed 1995-8\n",
      "Completed 1995-9\n",
      "Completed 1995-10\n",
      "Completed 1995-11\n",
      "Completed 1995-12\n",
      "Completed 1996-1\n",
      "Completed 1996-2\n",
      "Completed 1996-3\n",
      "Completed 1996-4\n",
      "Completed 1996-5\n",
      "Completed 1996-6\n",
      "Completed 1996-7\n",
      "Completed 1996-8\n",
      "Completed 1996-9\n",
      "Completed 1996-10\n",
      "Completed 1996-11\n",
      "Completed 1996-12\n",
      "Completed 1997-1\n",
      "Completed 1997-2\n",
      "Completed 1997-3\n",
      "Completed 1997-4\n",
      "Completed 1997-5\n",
      "Completed 1997-6\n",
      "Completed 1997-7\n",
      "Completed 1997-8\n",
      "Completed 1997-9\n",
      "Completed 1997-10\n",
      "Completed 1997-11\n",
      "Completed 1997-12\n",
      "Completed 1998-1\n",
      "Completed 1998-2\n",
      "Completed 1998-3\n",
      "Completed 1998-4\n",
      "Completed 1998-5\n",
      "Completed 1998-6\n",
      "Completed 1998-7\n",
      "Completed 1998-8\n",
      "Completed 1998-9\n",
      "Completed 1998-10\n",
      "Completed 1998-11\n",
      "Completed 1998-12\n",
      "Completed 1999-1\n",
      "Completed 1999-2\n",
      "Completed 1999-3\n",
      "Completed 1999-4\n",
      "Completed 1999-5\n",
      "Completed 1999-6\n",
      "Completed 1999-7\n",
      "Completed 1999-8\n",
      "Completed 1999-9\n",
      "Completed 1999-10\n",
      "Completed 1999-11\n",
      "Completed 1999-12\n",
      "Completed 2000-1\n",
      "Completed 2000-2\n",
      "Completed 2000-3\n",
      "Completed 2000-4\n",
      "Completed 2000-5\n",
      "Completed 2000-6\n",
      "Completed 2000-7\n",
      "Completed 2000-8\n",
      "Completed 2000-9\n",
      "Completed 2000-10\n",
      "Completed 2000-11\n",
      "Completed 2000-12\n",
      "Completed 2001-1\n",
      "Completed 2001-2\n",
      "Completed 2001-3\n",
      "Completed 2001-4\n",
      "Completed 2001-5\n",
      "Completed 2001-6\n",
      "Completed 2001-7\n",
      "Completed 2001-8\n",
      "Completed 2001-9\n",
      "Completed 2001-10\n",
      "Completed 2001-11\n",
      "Completed 2001-12\n",
      "Completed 2002-1\n",
      "Completed 2002-2\n",
      "Completed 2002-3\n",
      "Completed 2002-4\n",
      "Completed 2002-5\n",
      "Completed 2002-6\n",
      "Completed 2002-7\n",
      "Completed 2002-8\n",
      "Completed 2002-9\n",
      "Completed 2002-10\n",
      "Completed 2002-11\n",
      "Completed 2002-12\n",
      "Completed 2003-1\n",
      "Completed 2003-2\n",
      "Completed 2003-3\n",
      "Completed 2003-4\n",
      "Completed 2003-5\n",
      "Completed 2003-6\n",
      "Completed 2003-7\n",
      "Completed 2003-8\n",
      "Completed 2003-9\n",
      "Completed 2003-10\n",
      "Completed 2003-11\n",
      "Completed 2003-12\n",
      "Completed 2004-1\n",
      "Completed 2004-2\n",
      "Completed 2004-3\n",
      "Completed 2004-4\n",
      "Completed 2004-5\n",
      "Completed 2004-6\n",
      "Completed 2004-7\n",
      "Completed 2004-8\n",
      "Completed 2004-9\n",
      "Completed 2004-10\n",
      "Completed 2004-11\n",
      "Completed 2004-12\n",
      "Completed 2005-1\n",
      "Completed 2005-2\n",
      "Completed 2005-3\n",
      "Completed 2005-4\n",
      "Completed 2005-5\n",
      "Completed 2005-6\n",
      "Completed 2005-7\n",
      "Completed 2005-8\n",
      "Completed 2005-9\n",
      "Completed 2005-10\n",
      "Completed 2005-11\n",
      "Completed 2005-12\n",
      "Completed 2007-12\n",
      "Completed 2008-1\n",
      "Completed 2008-2\n",
      "Completed 2008-3\n",
      "Completed 2008-4\n",
      "Completed 2008-5\n",
      "Completed 2008-6\n",
      "Completed 2008-7\n",
      "Completed 2008-8\n",
      "Completed 2008-9\n",
      "Completed 2008-10\n",
      "Completed 2009-1\n",
      "Completed 2009-2\n",
      "Completed 2009-4\n",
      "Completed 2009-7\n",
      "Completed 2009-8\n",
      "Completed 2009-9\n",
      "Completed 2009-10\n",
      "Completed 2009-11\n",
      "Completed 2009-12\n",
      "Completed 2010-1\n",
      "Completed 2010-2\n",
      "Completed 2010-5\n",
      "Completed 2010-6\n",
      "Completed 2010-7\n",
      "Completed 2010-8\n",
      "Completed 2010-9\n",
      "Completed 2010-10\n",
      "Completed 2010-11\n",
      "Completed 2010-12\n",
      "Completed 2011-1\n",
      "Completed 2011-2\n",
      "Completed 2011-3\n",
      "Completed 2011-4\n",
      "Completed 2011-5\n",
      "Completed 2011-6\n",
      "Completed 2011-7\n",
      "Completed 2011-8\n",
      "Completed 2011-9\n",
      "Completed 2011-10\n",
      "Completed 2011-11\n",
      "Completed 2012-1\n",
      "Completed 2012-3\n",
      "Completed 2012-4\n",
      "Completed 2012-5\n",
      "Completed 2012-6\n",
      "Completed 2012-7\n",
      "Completed 2012-8\n",
      "Completed 2012-9\n",
      "Completed 2012-10\n",
      "Completed 2012-11\n",
      "Completed 2012-12\n",
      "Completed 2013-1\n",
      "Completed 2013-4\n",
      "Completed 2013-5\n",
      "Completed 2013-6\n",
      "Completed 2013-7\n",
      "Completed 2013-8\n",
      "Completed 2013-9\n",
      "Completed 2013-10\n",
      "Completed 2013-11\n",
      "Completed 2013-12\n",
      "Completed 2014-1\n",
      "Completed 2014-2\n",
      "Completed 2014-3\n",
      "Completed 2014-4\n",
      "Completed 2014-5\n",
      "Completed 2014-6\n",
      "Completed 2014-7\n",
      "Completed 2014-9\n",
      "Completed 2014-10\n",
      "Completed 2014-11\n",
      "Completed 2014-12\n",
      "Completed 2015-1\n",
      "Completed 2015-2\n",
      "Completed 2015-3\n",
      "Completed 2015-4\n",
      "Completed 2015-5\n",
      "Completed 2015-6\n",
      "Completed 2015-7\n",
      "Completed 2015-8\n",
      "Completed 2015-9\n",
      "Completed 2015-10\n",
      "Completed 2015-11\n",
      "Completed 2015-12\n",
      "Completed 2016-1\n",
      "Completed 2016-2\n",
      "Completed 2016-3\n",
      "Completed 2016-4\n",
      "Completed 2016-5\n",
      "Completed 2016-6\n",
      "Completed 2016-7\n",
      "Completed 2016-8\n",
      "Completed 2016-9\n",
      "Completed 2016-10\n",
      "Completed 2016-11\n",
      "Completed 2016-12\n",
      "Completed 2017-1\n",
      "Completed 2017-2\n",
      "Completed 2017-3\n",
      "Completed 2017-4\n",
      "Completed 2017-5\n",
      "Completed 2017-6\n",
      "Completed 2017-7\n",
      "Completed 2017-8\n",
      "Completed 2017-9\n",
      "Completed 2017-10\n",
      "Completed 2017-11\n",
      "Completed 2017-12\n",
      "Completed 2018-1\n",
      "Completed 2018-2\n",
      "Completed 2018-3\n",
      "Completed 2018-4\n",
      "Completed 2018-5\n",
      "Completed 2018-6\n",
      "Completed 2018-7\n",
      "Completed 2018-8\n",
      "Completed 2018-9\n",
      "Completed 2018-10\n"
     ]
    }
   ],
   "source": [
    "data = merge_news_price(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Open</th>\n",
       "      <th>High</th>\n",
       "      <th>Low</th>\n",
       "      <th>Close</th>\n",
       "      <th>Adj Close</th>\n",
       "      <th>Volume</th>\n",
       "      <th>lrets</th>\n",
       "      <th>MACD</th>\n",
       "      <th>stochastics</th>\n",
       "      <th>ATR</th>\n",
       "      <th>News</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2018-09-12</th>\n",
       "      <td>2888.290039</td>\n",
       "      <td>2894.649902</td>\n",
       "      <td>2879.199951</td>\n",
       "      <td>2888.919922</td>\n",
       "      <td>2888.919922</td>\n",
       "      <td>3.264930e+09</td>\n",
       "      <td>0.000357</td>\n",
       "      <td>-3.785216</td>\n",
       "      <td>55.976006</td>\n",
       "      <td>15.449951</td>\n",
       "      <td>Residents Prepare for Hurricane FlorenceEdgy I...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-09-13</th>\n",
       "      <td>2896.850098</td>\n",
       "      <td>2906.760010</td>\n",
       "      <td>2896.389893</td>\n",
       "      <td>2904.179932</td>\n",
       "      <td>2904.179932</td>\n",
       "      <td>3.254930e+09</td>\n",
       "      <td>0.005268</td>\n",
       "      <td>-3.030618</td>\n",
       "      <td>82.719774</td>\n",
       "      <td>17.840088</td>\n",
       "      <td>Stand. Run. And Run Some More.U.S. Aid Program...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-09-14</th>\n",
       "      <td>2906.379883</td>\n",
       "      <td>2908.300049</td>\n",
       "      <td>2895.770020</td>\n",
       "      <td>2904.979980</td>\n",
       "      <td>2904.979980</td>\n",
       "      <td>3.149800e+09</td>\n",
       "      <td>0.000275</td>\n",
       "      <td>-2.576736</td>\n",
       "      <td>84.121890</td>\n",
       "      <td>12.530029</td>\n",
       "      <td>The Playlist: Lana Del Rey’s Smoldering Return...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-09-15</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Companies Are Pushing for Less Disclosure. Is ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-09-16</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Katharine Verville, Edward GottfriedNina Sudar...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-09-17</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Emmys Red Carpet Photos 2018Fatal Shark Attack...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-09-18</th>\n",
       "      <td>2890.739990</td>\n",
       "      <td>2911.169922</td>\n",
       "      <td>2890.429932</td>\n",
       "      <td>2904.310059</td>\n",
       "      <td>2904.310059</td>\n",
       "      <td>3.074610e+09</td>\n",
       "      <td>0.005355</td>\n",
       "      <td>-2.451980</td>\n",
       "      <td>77.030322</td>\n",
       "      <td>20.739990</td>\n",
       "      <td>The Emmys Joked About TV’s Lack of Diversity, ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-09-19</th>\n",
       "      <td>2906.600098</td>\n",
       "      <td>2912.360107</td>\n",
       "      <td>2903.820068</td>\n",
       "      <td>2907.949951</td>\n",
       "      <td>2907.949951</td>\n",
       "      <td>3.280020e+09</td>\n",
       "      <td>0.001252</td>\n",
       "      <td>-2.251064</td>\n",
       "      <td>85.623058</td>\n",
       "      <td>8.540039</td>\n",
       "      <td>What’s on TV Wednesday: ‘The Greatest Showman’...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-09-20</th>\n",
       "      <td>2919.729980</td>\n",
       "      <td>2934.800049</td>\n",
       "      <td>2919.729980</td>\n",
       "      <td>2930.750000</td>\n",
       "      <td>2930.750000</td>\n",
       "      <td>3.337730e+09</td>\n",
       "      <td>0.007810</td>\n",
       "      <td>-0.624690</td>\n",
       "      <td>100.000000</td>\n",
       "      <td>26.850098</td>\n",
       "      <td>Household ChoresOn Politics: Stalemate Over Ka...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-09-21</th>\n",
       "      <td>2936.760010</td>\n",
       "      <td>2940.909912</td>\n",
       "      <td>2927.110107</td>\n",
       "      <td>2929.669922</td>\n",
       "      <td>2929.669922</td>\n",
       "      <td>5.607610e+09</td>\n",
       "      <td>-0.000369</td>\n",
       "      <td>0.241159</td>\n",
       "      <td>98.171531</td>\n",
       "      <td>13.799805</td>\n",
       "      <td>What’s on TV Friday: ‘Quincy’ and ‘Jack White:...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-09-22</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Why Does the Tennis Season End Before It’s Ove...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-09-23</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>‘America First’ Has WonKavanaugh Was Supposed ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-09-24</th>\n",
       "      <td>2921.830078</td>\n",
       "      <td>2923.790039</td>\n",
       "      <td>2912.629883</td>\n",
       "      <td>2919.370117</td>\n",
       "      <td>2919.370117</td>\n",
       "      <td>3.372210e+09</td>\n",
       "      <td>-0.003522</td>\n",
       "      <td>-0.119090</td>\n",
       "      <td>80.734942</td>\n",
       "      <td>17.040039</td>\n",
       "      <td>The Trouble With South Sudan’s New Peace DealC...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-09-25</th>\n",
       "      <td>2921.750000</td>\n",
       "      <td>2923.949951</td>\n",
       "      <td>2913.699951</td>\n",
       "      <td>2915.560059</td>\n",
       "      <td>2915.560059</td>\n",
       "      <td>3.285480e+09</td>\n",
       "      <td>-0.001306</td>\n",
       "      <td>-0.866335</td>\n",
       "      <td>74.284876</td>\n",
       "      <td>10.250000</td>\n",
       "      <td>Corrections: September 25, 2018Do You Want You...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-09-26</th>\n",
       "      <td>2916.979980</td>\n",
       "      <td>2931.149902</td>\n",
       "      <td>2903.280029</td>\n",
       "      <td>2905.969971</td>\n",
       "      <td>2905.969971</td>\n",
       "      <td>3.388620e+09</td>\n",
       "      <td>-0.003295</td>\n",
       "      <td>-2.276606</td>\n",
       "      <td>58.049771</td>\n",
       "      <td>27.869873</td>\n",
       "      <td>Braves Jump on Mets’ Bullpen for Sixth Straigh...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-09-27</th>\n",
       "      <td>2911.649902</td>\n",
       "      <td>2927.219971</td>\n",
       "      <td>2909.270020</td>\n",
       "      <td>2914.000000</td>\n",
       "      <td>2914.000000</td>\n",
       "      <td>3.060850e+09</td>\n",
       "      <td>0.002759</td>\n",
       "      <td>-2.841769</td>\n",
       "      <td>71.643845</td>\n",
       "      <td>21.250000</td>\n",
       "      <td>‘I Could Be Doing This All Day.’ Trump Delight...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-09-28</th>\n",
       "      <td>2910.030029</td>\n",
       "      <td>2920.530029</td>\n",
       "      <td>2907.500000</td>\n",
       "      <td>2913.979980</td>\n",
       "      <td>2913.979980</td>\n",
       "      <td>3.432300e+09</td>\n",
       "      <td>-0.000007</td>\n",
       "      <td>-3.367701</td>\n",
       "      <td>68.724388</td>\n",
       "      <td>13.030029</td>\n",
       "      <td>Tu resumen de noticias del viernesSyphilis Ris...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-09-29</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Corrections: September 29, 2018How to Know if ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-09-30</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Katherine Whitman, Frank Broomell Jr.Rose Scha...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-10-01</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>How to win your first three months on the jobI...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-10-02</th>\n",
       "      <td>2923.800049</td>\n",
       "      <td>2931.419922</td>\n",
       "      <td>2919.370117</td>\n",
       "      <td>2923.429932</td>\n",
       "      <td>2923.429932</td>\n",
       "      <td>3.401880e+09</td>\n",
       "      <td>-0.000397</td>\n",
       "      <td>-3.165455</td>\n",
       "      <td>82.921023</td>\n",
       "      <td>17.439942</td>\n",
       "      <td>Jason Kander Withdraws From Kansas City Mayora...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-10-03</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>On Politics: Trump Empire Was Built On Suspect...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-10-04</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>My Human DoctorT’s Best Photos From Paris Fash...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-10-05</th>\n",
       "      <td>2902.540039</td>\n",
       "      <td>2909.639893</td>\n",
       "      <td>2869.290039</td>\n",
       "      <td>2885.570068</td>\n",
       "      <td>2885.570068</td>\n",
       "      <td>3.328980e+09</td>\n",
       "      <td>-0.005543</td>\n",
       "      <td>-5.849199</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>54.139893</td>\n",
       "      <td>Protesters Pack the Senate BuildingT Suggests:...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-10-06</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>F.B.I. Review of Kavanaugh Was Limited From th...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-10-07</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Amelia Sanchez-Moran, Nicholas AlexsovichBD Wo...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-10-08</th>\n",
       "      <td>2877.530029</td>\n",
       "      <td>2889.449951</td>\n",
       "      <td>2862.080078</td>\n",
       "      <td>2884.429932</td>\n",
       "      <td>2884.429932</td>\n",
       "      <td>3.330320e+09</td>\n",
       "      <td>-0.000395</td>\n",
       "      <td>-7.839346</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>27.369873</td>\n",
       "      <td>New U.N. Climate Report Says Put a High Price ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-10-09</th>\n",
       "      <td>2882.510010</td>\n",
       "      <td>2894.830078</td>\n",
       "      <td>2874.270020</td>\n",
       "      <td>2880.340088</td>\n",
       "      <td>2880.340088</td>\n",
       "      <td>3.520500e+09</td>\n",
       "      <td>-0.001419</td>\n",
       "      <td>-9.479757</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>20.560058</td>\n",
       "      <td>Kanye West Expected to Visit Trump at the Whit...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-10-10</th>\n",
       "      <td>2873.899902</td>\n",
       "      <td>2874.020020</td>\n",
       "      <td>2784.860107</td>\n",
       "      <td>2785.679932</td>\n",
       "      <td>2785.679932</td>\n",
       "      <td>4.501250e+09</td>\n",
       "      <td>-0.033416</td>\n",
       "      <td>-17.312894</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>95.479981</td>\n",
       "      <td>Learning With: ‘Banksy Painting Self-Destructs...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-10-11</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Corrections: October 11, 2018Khashoggi’s Disap...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   Open         High          Low        Close    Adj Close  \\\n",
       "2018-09-12  2888.290039  2894.649902  2879.199951  2888.919922  2888.919922   \n",
       "2018-09-13  2896.850098  2906.760010  2896.389893  2904.179932  2904.179932   \n",
       "2018-09-14  2906.379883  2908.300049  2895.770020  2904.979980  2904.979980   \n",
       "2018-09-15          NaN          NaN          NaN          NaN          NaN   \n",
       "2018-09-16          NaN          NaN          NaN          NaN          NaN   \n",
       "2018-09-17          NaN          NaN          NaN          NaN          NaN   \n",
       "2018-09-18  2890.739990  2911.169922  2890.429932  2904.310059  2904.310059   \n",
       "2018-09-19  2906.600098  2912.360107  2903.820068  2907.949951  2907.949951   \n",
       "2018-09-20  2919.729980  2934.800049  2919.729980  2930.750000  2930.750000   \n",
       "2018-09-21  2936.760010  2940.909912  2927.110107  2929.669922  2929.669922   \n",
       "2018-09-22          NaN          NaN          NaN          NaN          NaN   \n",
       "2018-09-23          NaN          NaN          NaN          NaN          NaN   \n",
       "2018-09-24  2921.830078  2923.790039  2912.629883  2919.370117  2919.370117   \n",
       "2018-09-25  2921.750000  2923.949951  2913.699951  2915.560059  2915.560059   \n",
       "2018-09-26  2916.979980  2931.149902  2903.280029  2905.969971  2905.969971   \n",
       "2018-09-27  2911.649902  2927.219971  2909.270020  2914.000000  2914.000000   \n",
       "2018-09-28  2910.030029  2920.530029  2907.500000  2913.979980  2913.979980   \n",
       "2018-09-29          NaN          NaN          NaN          NaN          NaN   \n",
       "2018-09-30          NaN          NaN          NaN          NaN          NaN   \n",
       "2018-10-01          NaN          NaN          NaN          NaN          NaN   \n",
       "2018-10-02  2923.800049  2931.419922  2919.370117  2923.429932  2923.429932   \n",
       "2018-10-03          NaN          NaN          NaN          NaN          NaN   \n",
       "2018-10-04          NaN          NaN          NaN          NaN          NaN   \n",
       "2018-10-05  2902.540039  2909.639893  2869.290039  2885.570068  2885.570068   \n",
       "2018-10-06          NaN          NaN          NaN          NaN          NaN   \n",
       "2018-10-07          NaN          NaN          NaN          NaN          NaN   \n",
       "2018-10-08  2877.530029  2889.449951  2862.080078  2884.429932  2884.429932   \n",
       "2018-10-09  2882.510010  2894.830078  2874.270020  2880.340088  2880.340088   \n",
       "2018-10-10  2873.899902  2874.020020  2784.860107  2785.679932  2785.679932   \n",
       "2018-10-11          NaN          NaN          NaN          NaN          NaN   \n",
       "\n",
       "                  Volume     lrets       MACD  stochastics        ATR  \\\n",
       "2018-09-12  3.264930e+09  0.000357  -3.785216    55.976006  15.449951   \n",
       "2018-09-13  3.254930e+09  0.005268  -3.030618    82.719774  17.840088   \n",
       "2018-09-14  3.149800e+09  0.000275  -2.576736    84.121890  12.530029   \n",
       "2018-09-15           NaN       NaN        NaN          NaN        NaN   \n",
       "2018-09-16           NaN       NaN        NaN          NaN        NaN   \n",
       "2018-09-17           NaN       NaN        NaN          NaN        NaN   \n",
       "2018-09-18  3.074610e+09  0.005355  -2.451980    77.030322  20.739990   \n",
       "2018-09-19  3.280020e+09  0.001252  -2.251064    85.623058   8.540039   \n",
       "2018-09-20  3.337730e+09  0.007810  -0.624690   100.000000  26.850098   \n",
       "2018-09-21  5.607610e+09 -0.000369   0.241159    98.171531  13.799805   \n",
       "2018-09-22           NaN       NaN        NaN          NaN        NaN   \n",
       "2018-09-23           NaN       NaN        NaN          NaN        NaN   \n",
       "2018-09-24  3.372210e+09 -0.003522  -0.119090    80.734942  17.040039   \n",
       "2018-09-25  3.285480e+09 -0.001306  -0.866335    74.284876  10.250000   \n",
       "2018-09-26  3.388620e+09 -0.003295  -2.276606    58.049771  27.869873   \n",
       "2018-09-27  3.060850e+09  0.002759  -2.841769    71.643845  21.250000   \n",
       "2018-09-28  3.432300e+09 -0.000007  -3.367701    68.724388  13.030029   \n",
       "2018-09-29           NaN       NaN        NaN          NaN        NaN   \n",
       "2018-09-30           NaN       NaN        NaN          NaN        NaN   \n",
       "2018-10-01           NaN       NaN        NaN          NaN        NaN   \n",
       "2018-10-02  3.401880e+09 -0.000397  -3.165455    82.921023  17.439942   \n",
       "2018-10-03           NaN       NaN        NaN          NaN        NaN   \n",
       "2018-10-04           NaN       NaN        NaN          NaN        NaN   \n",
       "2018-10-05  3.328980e+09 -0.005543  -5.849199     0.000000  54.139893   \n",
       "2018-10-06           NaN       NaN        NaN          NaN        NaN   \n",
       "2018-10-07           NaN       NaN        NaN          NaN        NaN   \n",
       "2018-10-08  3.330320e+09 -0.000395  -7.839346     0.000000  27.369873   \n",
       "2018-10-09  3.520500e+09 -0.001419  -9.479757     0.000000  20.560058   \n",
       "2018-10-10  4.501250e+09 -0.033416 -17.312894     0.000000  95.479981   \n",
       "2018-10-11           NaN       NaN        NaN          NaN        NaN   \n",
       "\n",
       "                                                         News  \n",
       "2018-09-12  Residents Prepare for Hurricane FlorenceEdgy I...  \n",
       "2018-09-13  Stand. Run. And Run Some More.U.S. Aid Program...  \n",
       "2018-09-14  The Playlist: Lana Del Rey’s Smoldering Return...  \n",
       "2018-09-15  Companies Are Pushing for Less Disclosure. Is ...  \n",
       "2018-09-16  Katharine Verville, Edward GottfriedNina Sudar...  \n",
       "2018-09-17  Emmys Red Carpet Photos 2018Fatal Shark Attack...  \n",
       "2018-09-18  The Emmys Joked About TV’s Lack of Diversity, ...  \n",
       "2018-09-19  What’s on TV Wednesday: ‘The Greatest Showman’...  \n",
       "2018-09-20  Household ChoresOn Politics: Stalemate Over Ka...  \n",
       "2018-09-21  What’s on TV Friday: ‘Quincy’ and ‘Jack White:...  \n",
       "2018-09-22  Why Does the Tennis Season End Before It’s Ove...  \n",
       "2018-09-23  ‘America First’ Has WonKavanaugh Was Supposed ...  \n",
       "2018-09-24  The Trouble With South Sudan’s New Peace DealC...  \n",
       "2018-09-25  Corrections: September 25, 2018Do You Want You...  \n",
       "2018-09-26  Braves Jump on Mets’ Bullpen for Sixth Straigh...  \n",
       "2018-09-27  ‘I Could Be Doing This All Day.’ Trump Delight...  \n",
       "2018-09-28  Tu resumen de noticias del viernesSyphilis Ris...  \n",
       "2018-09-29  Corrections: September 29, 2018How to Know if ...  \n",
       "2018-09-30  Katherine Whitman, Frank Broomell Jr.Rose Scha...  \n",
       "2018-10-01  How to win your first three months on the jobI...  \n",
       "2018-10-02  Jason Kander Withdraws From Kansas City Mayora...  \n",
       "2018-10-03  On Politics: Trump Empire Was Built On Suspect...  \n",
       "2018-10-04  My Human DoctorT’s Best Photos From Paris Fash...  \n",
       "2018-10-05  Protesters Pack the Senate BuildingT Suggests:...  \n",
       "2018-10-06  F.B.I. Review of Kavanaugh Was Limited From th...  \n",
       "2018-10-07  Amelia Sanchez-Moran, Nicholas AlexsovichBD Wo...  \n",
       "2018-10-08  New U.N. Climate Report Says Put a High Price ...  \n",
       "2018-10-09  Kanye West Expected to Visit Trump at the Whit...  \n",
       "2018-10-10  Learning With: ‘Banksy Painting Self-Destructs...  \n",
       "2018-10-11  Corrections: October 11, 2018Khashoggi’s Disap...  "
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.tail(30)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Export new data as a csv file"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/bf/anaconda3/envs/dasc/lib/python3.6/site-packages/pandas/core/generic.py:1996: PerformanceWarning: \n",
      "your performance may suffer as PyTables will pickle object types that it cannot\n",
      "map directly to c-types [inferred_type->mixed,key->block1_values] [items->['News']]\n",
      "\n",
      "  return pytables.to_hdf(path_or_buf, key, self, **kwargs)\n"
     ]
    }
   ],
   "source": [
    "data.to_hdf(\"data.h5\", 'data')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
